rm(list=ls(all=TRUE))
library(foreign)
library(stats)
library(IDPmisc)
library(reshape)
library(MASS)
library(ggplot2)
library(quantreg)

#Opening the databases. In this case, governor elections are considered as the comparison group
oax10=read.dta("/Users/Cantu/Documents/Yo/Proyectos/Dissertation/R codes Fraud/CodesAJPS/workingdata/Oaxaca2010.dta")

#Getting proportions and turnout rates
oax10[is.na(oax10)]<-0

oax10$totvot=oax10$pan+oax10$pri+oax10$prd+oax10$pvem+oax10$pt+oax10$pc+oax10$pup+oax10$pna+oax10$cnr+oax10$nulos
oax10$efectv=oax10$totvot-oax10$nulos-oax10$cnr
oax10$part=oax10$totvot/oax10$listanominal
oax10$eviel=oax10$pri+oax10$pvem
oax10$gabino=oax10$pan+oax10$prd+oax10$pt+oax10$pc
oax10$peviel=(oax10$pri+oax10$pvem)/oax10$totvot
oax10$pgabino=(oax10$pan+oax10$prd+oax10$pt+oax10$pc)/oax10$totvot
oax10$pPUP=oax10$pup/oax10$totvot
oax10$pPANAL=oax10$pna/oax10$totvot
oax10$pPRI=oax10$pri/oax10$totvot
oax10$pPVEM=oax10$pvem/oax10$totvot
oax10$pPAN=oax10$pan/oax10$totvot
oax10$pPRD=oax10$prd/oax10$totvot
oax10$pCONV=oax10$pc/oax10$totvot
oax10$pPT=oax10$pt/oax10$totvot
oax10$pnulos=oax10$nulos/oax10$totvot
oax10$pnoreg<-oax10$cnr/oax10$totvot


oax10$partE=oax10$efectv/oax10$listanominal
oax10$pevielE=(oax10$pri+oax10$pvem)/oax10$efectv
oax10$pgabinoE=(oax10$pan+oax10$prd+oax10$pt+oax10$pc)/oax10$efectv
oax10$pPUPe=oax10$pup/oax10$efectv
oax10$pPANALe=oax10$pna/oax10$efectv
oax10$pPRIe=oax10$pri/oax10$efectv
oax10$pPVEMe=oax10$pvem/oax10$efectv
oax10$pPANe=oax10$pan/oax10$efectv
oax10$pPRDe=oax10$prd/oax10$efectv
oax10$pCONVe=oax10$pc/oax10$efectv
oax10$pPTe=oax10$pt/oax10$efectv


#########
oax10[is.na(oax10)]<-0



#Getting rid out of the polling stations which consider voters outside the electoral precincts.


oax10$casilla[oax10$casilla=="Ex - C1"]<-NA
oax10$casilla[oax10$casilla=="Ext"]<-NA
oax10$casilla[oax10$casilla=="E"]<-NA
oax10$casilla[oax10$casilla=="Ext1"]<-NA
oax10$casilla[oax10$casilla=="Ext2"]<-NA
oax10$casilla[oax10$casilla=="X1"]<-NA
oax10$casilla[oax10$casilla=="X1 C1"]<-NA
oax10$casilla[oax10$casilla=="X1C1"]<-NA
oax10$casilla[oax10$casilla=="X2"]<-NA
oax10$casilla[oax10$casilla=="EXT"]<-NA
oax10$casilla[oax10$casilla=="EXT1"]<-NA
oax10$casilla[oax10$casilla=="EXT2"]<-NA



oax10$casilla[oax10$casilla=="Extraordinaria"]<-NA
oax10$casilla[oax10$casilla=="Extraordinaria2"]<-NA
oax10$totvot[oax10$totvot==0]<-NA
oax10$listanominal[oax10$listanominal==0]<-NA
oax10$seccion[oax10$seccion==0]<-NA

treated<-na.omit(oax10)

#Number of rows for both 
R<-nrow(treated)
T<-na.omit(treated)
T<-T[order(T$seccion) , ]
#####
T<-T[ which(T$pnulos<0.05),]

#####

T$count<-1

Rt<-nrow(T)

T$Cd1<-0
for (i in 1:Rt){ 
T$Cd1[i]<-rbinom(1,1,.5)}

T$difA<-NA
for (i in 1:Rt){ 
  if(T$Cd1[i]==1){
  	for (j in 1:Rt){ 
			if((T$seccion[i]==T$seccion[j]) & (i!=j)){
				T$difA[j]<-abs(T$partE[i]-T$partE[j])}}}}
				
threshold<-quantile(T$difA,p=.95, na.rm=TRUE)

T$T1<-0
for (i in 1:Rt){ 
for (j in 1:Rt){ 
if (T$seccion[i]==T$seccion[j]){
if ((T$part[i]-T$part[j])>threshold ){ 
T$T1[i]<-1
}}}}

T$C1<-0
for (i in 1:Rt){
if (T$T1[i]==1){
for (j in 1:Rt){
if (T$seccion[j]==T$seccion[i]){
if (T$T1[j]==0){
if ((T$partE[i]-T$partE[j])>threshold){ 
T$C1[j]<-1  
  }}}}}}

T$treatedsec<-0
for (i in 1:Rt){ 
if (T$T1[i]>=1){
for (j in 1:Rt){
if (T$seccion[j]==T$seccion[i]){
T$treatedsec[j]<-1}}}
}

T$placebo<-0
  for (j in 1:nrow(T)){
    if (T$treatedsec[j]==0)
      T$placebo[j]<-rbinom(1,1,.5)
  }


W<-T[ which(T$T1==0),]
W<-W[ which(W$placebo==0),]

W$pPRIE<-W$pevielE
W$pPRDE<-W$pgabinoE

expPRI<-aggregate(W$pPRIE, by=list(W$seccion), 
                     FUN=mean, na.rm=TRUE)  

#expPAN<-aggregate(W$pPANE, by=list(W$seccion), 
#                     FUN=mean, na.rm=TRUE) 
                     
expPRD<-aggregate(W$pPRDE, by=list(W$seccion), 
                    FUN=mean, na.rm=TRUE)                                           
                     
colnames(expPRI)<-c("seccion","exp") 
#colnames(expPAN)<-c("seccion","exp")                  
colnames(expPRD)<-c("seccion","exp")                  
                 

locseccion<-match(T$seccion,expPRI$seccion)
n<-length(locseccion)
T$expPRI<-NA
#T$expPAN<-NA
T$expPRD<-NA

for(i in 1:nrow(T)){
	for(j in 1:nrow(expPRI)){
 if (T$seccion[i]==expPRI$seccion[j]){
 T$expPRI[i]<-expPRI$exp[j]
#  T$expPAN[i]<-expPAN$exp[j]
 T$expPRD[i]<-expPRD$exp[j]
 }
}
}

T$exppri<-T$expPRI*T$efectv
#T$exppan<-T$expPAN*T$efectv
T$expprd<-T$expPRD*T$efectv

Z1<-subset(T, treatedsec!=1)

Z1$x<-Z1$exppri
Z1$y<-Z1$eviel

Z1$x[Z1$placebo==0]<-NA

B1a<-MCMCquantreg(y~x, data = Z1, tau=0.975, burnin = 1000, mcmc = 10000, thin = 1, verbose = 0, seed = sample(1:1000000,1), beta.start = NA, b0 = 0, B0 = 0)

B1aX0<-mean(B1a[,1])
B1aX1<-mean(B1a[,2])

B1b<-MCMCquantreg(y~x, data = Z1, tau=0.025, burnin = 1000, mcmc = 10000, thin = 1, verbose = 0, seed = sample(1:1000000,1), beta.start = NA, b0 = 0, B0 = 0)

B1bX0<-mean(B1b[,1])
B1bX1<-mean(B1b[,2])

A<-subset(T)

A$x<-A$exppri
A$y<-A$eviel

is.na(A$x[A$treatedsec==1 & A$T1==0])<-TRUE
is.na(A$x[A$treatedsec==0 & A$placebo==0])<-TRUE
is.na(A$y[A$treatedsec==1 & A$T1==0])<-TRUE
is.na(A$y[A$treatedsec==0 & A$placebo==0])<-TRUE

A$g<-A$treatedsec
A$g[A$g==1]<-"2. Analysis Group"
A$g[A$g==0]<-"1. Parallel"


p <- ggplot(A, aes(x, y)) 

         

pdf(file="/Users/Cantu/Documents/Yo/Proyectos/Dissertation/R codes Fraud/CodesAJPS/workingdata/OaxacaINCUMBENT.pdf")

p + geom_point() +
  theme_bw()+
#  aes(colour=A$DD)+
#  scale_colour_gradient(high = "yellow", low="black")+
  opts(legend.position = "none")+
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype=2)+
  geom_abline(intercept = B1bX0, slope = B1bX1, colour = "green")+
  geom_abline(intercept = B1aX0, slope = B1aX1, colour = "green")+
# geom_text(aes(label=seccion))+
  facet_wrap(~ g)+
  xlab("Expected votes")+
  ylab("Observed votes")+
  opts(title="PRI, Oaxaca, 2010 ")

dev.off()

##########################

Z1<-subset(T, treatedsec!=1)

Z1$x<-Z1$expprd
Z1$y<-Z1$gabino

Z1$x[Z1$placebo==0]<-NA

B1a<-MCMCquantreg(y~x, data = Z1, tau=0.975, burnin = 1000, mcmc = 10000, thin = 1, verbose = 0, seed = sample(1:1000000,1), beta.start = NA, b0 = 0, B0 = 0)

B1aX0<-mean(B1a[,1])
B1aX1<-mean(B1a[,2])

B1b<-MCMCquantreg(y~x, data = Z1, tau=0.025, burnin = 1000, mcmc = 10000, thin = 1, verbose = 0, seed = sample(1:1000000,1), beta.start = NA, b0 = 0, B0 = 0)

B1bX0<-mean(B1b[,1])
B1bX1<-mean(B1b[,2])

A<-subset(T)

A$x<-A$expprd
A$y<-A$gabino

is.na(A$x[A$suspSECC==1 & A$suspCASILLA==0])<-TRUE
is.na(A$x[A$suspSECC==0 & A$placebo==0])<-TRUE
is.na(A$y[A$suspSECC==1 & A$suspCASILLA==0])<-TRUE
is.na(A$y[A$suspSECC==0 & A$placebo==0])<-TRUE

A$g<-A$treatedsec
A$g[A$g==1]<-"2. Analysis Group"
A$g[A$g==0]<-"1. Parallel"


p <- ggplot(A, aes(x, y)) 

         

pdf(file="/Users/Cantu/Documents/Yo/Proyectos/Dissertation/R codes Fraud/CodesAJPS/workingdata/OaxacaCHALLENGER.pdf")

p + geom_point() +
  theme_bw()+
#  aes(colour=A$DD)+
#  scale_colour_gradient(high = "yellow", low="black")+
  opts(legend.position = "none")+
  geom_abline(intercept = 0, slope = 1, colour = "red", linetype=2)+
  geom_abline(intercept = B1bX0, slope = B1bX1, colour = "green")+
  geom_abline(intercept = B1aX0, slope = B1aX1, colour = "green")+
# geom_text(aes(label=seccion))+
  facet_wrap(~ g)+
  xlab("Expected votes")+
  ylab("Observed votes")+
  opts(title="PRD, Oaxaca, 2010 ")

dev.off()

write.dta(T, "/Users/Cantu/Documents/Yo/Proyectos/Dissertation/R codes Fraud/CodesAJPS/workingdata/OaxacaT.dta")